
pacman::p_load(dplyr)

#############################
# fgzw1p to wide format
#############################

w1t1 <- fgzw1p %>% 
  filter(wave=="W1T1") %>% # select only variables that are not all missing
  select_if(~!all(. <0)) #delete variables with all missing observations

w1t2 <- fgzw1p %>% 
  filter(wave=="W1T2") %>% 
  select_if(~!all(. <0))

w1t2$w1t2 <- rep(1, nrow(w1t2)) # create variable: participated in wave 1, part 2

names(w1t1)[-c(1:3)] <- paste0(names(w1t1)[-c(1:3)], "_w1t1") # append suffix except for the first three variables (pid hid and cid)
names(w1t2)[-c(1:3)] <- paste0(names(w1t2)[-c(1:3)], "_w1t2")

#merge
fgzw1 <- merge(w1t1, w1t2,by=c("pid", "hid", "cid"),all=T) # cid - original household number (extra, usually same as hid), hid - new household number


#############################
# merge with household data fgzw1h
#############################

# filter for part 1 / 2, and select only variables that are not all missing
w1t1 <- fgzw1h %>% 
  filter(wave=="W1T1") %>% 
  select_if(~!all(. <0))

w1t2 <- fgzw1h %>% 
  filter(wave=="W1T2") %>% 
  select_if(~!all(. <0)) 

names(w1t1)[-c(1:2)] <- paste0(names(w1t1)[-c(1:2)], "_hw1t1")
names(w1t2)[-c(1:2)] <- paste0(names(w1t2)[-c(1:2)], "_hw1t2")

fgzw1h <- merge(w1t1, w1t2, by = c("hid","cid"), all = T)

merge <- arrange(fgzw1h,hid)
fgzw1 <- arrange(fgzw1,hid)
fgzw1 <- merge(fgzw1, merge, by = c("hid","cid"), all = T)

# #############################
# # merge with hgen
# #############################

# filter for part 1 / 2, and select only variables that are not all missing
w1t1 <- hgen %>% 
  filter(wave=="W1T1") %>% 
  select_if(~!all(. <0))
w1t2 <- hgen %>% 
  filter(wave=="W1T2") %>% 
  select_if(~!all(. <0))

names(w1t1)[-c(1:2)] <- paste0(names(w1t1)[-c(1:2)], "_hgenw1t1")
names(w1t2)[-c(1:2)] <- paste0(names(w1t2)[-c(1:2)], "_hgenw1t2")

hgen <- merge(w1t1, w1t2, by = c("hid","cid"), all = T)

merge <- arrange(hgen,hid)
fgzw1 <- arrange(fgzw1,hid)
fgzw1 <- merge(fgzw1, merge, by = c("hid","cid"), all = T)


#############################
# merge with phrf #(weights)
#############################


# filter for part 1 / 2, and select only variables that are not all -8
w1t1 <- phrf %>% 
  filter(wave=="W1T1") %>% 
  select_if(~!all(. <0))
w1t2 <- phrf %>% 
  filter(wave=="W1T2") %>% 
  select_if(~!all(. <0))

names(w1t1)[-c(1)] <- paste0(names(w1t1)[-c(1)], "_phrfw1t1")
names(w1t2)[-c(1)] <- paste0(names(w1t2)[-c(1)], "_phrfw1t2")

phrf <- merge(w1t1, w1t2, by = "pid", all = T)

merge <- arrange(phrf,pid)
fgzw1 <- arrange(fgzw1,pid)
fgzw1 <- merge(fgzw1, merge, by = "pid", all = T)

#############################
# merge with pgen
#############################

# filter for part 1 / 2, and select only variables that are not all -8
w1t1 <- pgen %>% 
  filter(wave=="W1T1") %>% 
  select_if(~!all(.<0))
w1t2 <- pgen %>% 
  filter(wave=="W1T2") %>% 
  select_if(~!all(. <0))

names(w1t1)[-c(1:3)] <- paste0(names(w1t1)[-c(1:3)], "_pgenw1t1")
names(w1t2)[-c(1:3)] <- paste0(names(w1t2)[-c(1:3)], "_pgenw1t2")

pgen <- merge(w1t1, w1t2, by = c("pid","hid","cid"), all = T)

merge <- arrange(pgen,pid)
fgzw1 <- arrange(fgzw1,pid)
fgzw1 <- merge(fgzw1, merge, by =  c("pid","hid","cid"), all = T)


#############################
# merge with household meta information
#############################

w1t1 <- hhrf %>% 
  filter(wave=="W1T1") %>% 
  select_if(~!all(. <0))
w1t2 <- hhrf %>% 
  filter(wave=="W1T2") %>% 
  select_if(~!all(. <0))

names(w1t1)[-c(1)] <- paste0(names(w1t1)[-c(1)], "_hhrfw1t1")
names(w1t2)[-c(1)] <- paste0(names(w1t2)[-c(1)], "_hhrfw1t2")


hhrf <- merge(w1t1, w1t2, by = "hid", all = T)

merge <- arrange(hhrf,hid)
fgzw1 <- arrange(fgzw1,hid)
fgzw1 <- merge(fgzw1, merge, by="hid", all = T)


#############################
# check if no variables with same name were merged
#############################
# fgzw1_ <- fgzw1 %>%
#   select(ends_with(".x"))
# names(fgzw1_)
# rm(fgzw1_)

#############################
# declare missings 
#############################

# missings
fgzw1 <- fgzw1 %>% 
  mutate_all(~ ifelse(. < 0, NA, .))


#############################
# merge with wave 2 
#############################

merge <- as.data.frame(fgzw2p) %>%
  mutate_all(~ ifelse(. < 0, NA, .)) %>%
  mutate(wave = "w2")
# summary(merge$hid)
# merge <- merge[c(3,1,2,4:ncol(merge))] # reorder: pid as first variable
# names(merge)[-c(1,2,3)] <- paste0(names(merge)[-c(1,2,3)], "_w2") # add string to cid and hid as well
# 
# merge <- arrange(merge,pid)
# fgzw1 <- arrange(fgzw1,pid)
# fgz <- merge(fgzw1, merge, by = c("pid", "hid", "cid"), all = T)

# new code 18 nov 2024:
merge <- merge[c(3,1,2,4:ncol(merge))] # reorder: pid as first variable
names(merge)[-c(1)] <- paste0(names(merge)[-c(1)], "_w2") # add string to cid and hid as well

merge <- arrange(merge,pid)
fgzw1 <- arrange(fgzw1,pid)
fgz <- merge(fgzw1, merge, by = c("pid"), all = T)


fgzw2 <- fgzw2p

# table(fgz$wave_w2,fgz$wave_w1t1,useNA="ifany") # 274 nicht zuordbare Personen in wave 2


#############################
# merge with wave 2 - household information
#############################

merge <- fgzw2h %>%
  mutate_all(~ ifelse(. < 0, NA, .)) %>%
  mutate(wave = "w2h")

# merge <- merge[c(3,1,2,4:ncol(merge))] # reorder: pid as first variable
# names(merge)[-c(1,2,3)] <- paste0(names(merge)[-c(1,2,3)], "_w2h") # add string to cid and hid as well
# 
# merge <- arrange(merge,pid)
# fgz <- arrange(fgz,pid)
# fgz <- merge(fgz, merge, by = c("pid", "hid", "cid"), all = T)
 

# new code 18 nov 2024:

merge <- merge[c(3,1,2,4:ncol(merge))] # reorder: pid as first variable
names(merge) <- paste0(names(merge), "_w2h") # add string to cid and hid as well


merge <- merge %>% 
  mutate(hid_w2 = hid_w2h) %>% 
  select(-c(pid_w2h))

merge <- arrange(merge,hid_w2)
fgz <- arrange(fgz,hid_w2)
fgz <- merge(fgz, merge, by = "hid_w2", all = T)


#############################
# merge with wave 2 - weights phrf
#############################

# merge <- fgzw2phrf %>%
#   mutate_all(~ ifelse(. < 0, NA, .)) %>%
#   mutate(wave = "w2phrf")
# 
# merge <- merge[c(2, 1,3:ncol(merge))] # reorder: pid as first variable
# names(merge)[-c(1,2)] <- paste0(names(merge)[-c(1,2)], "_w2phrf") # add string to cid and hid as well
# 
# merge <- arrange(merge,pid)
# fgz <- arrange(fgz,pid)
# fgz <- merge(fgz, merge, by = c("pid", "hid"), all = T)


#############################
# merge with wave 3 
#############################

merge <- as.data.frame(fgzw3p) %>%
  mutate_all(~ ifelse(. < 0, NA, .)) %>%
  mutate(wave = "w3")

# # summary(merge$hid)
# merge <- merge[c(3,1,2,4:ncol(merge))] # reorder: pid as first variable
# names(merge)[-c(1,2,3)] <- paste0(names(merge)[-c(1,2,3)], "_w3") # add string to cid and hid as well
# 
# merge <- arrange(merge,pid)
# fgz <- arrange(fgz,pid)
# #fgz <- merge(fgzw1, merge, by = c("pid", "hid", "cid"), all = T)
# fgz <- merge(fgz, merge, by = c("pid", "hid", "cid"), all = T)
# # table(fgz$wave_w3,fgz$wave_w1t1,useNA="ifany") # 420 nicht zuordbare Personen in wave 3

# new code 18 nov 2024: 
merge <- merge[c(3,1,2,4:ncol(merge))] # reorder: pid as first variable
names(merge)[-c(1)] <- paste0(names(merge)[-c(1)], "_w3") # add string to cid and hid as well

merge <- arrange(merge,pid)
fgz <- arrange(fgz,pid)
fgz <- merge(fgz, merge, by = c("pid"), all = T)



# remove objects

rm(w1t1,
   w1t2,
   merge,
   fgzw1h, 
   fgzw1p,
   hgen,
   pgen,
   hhrf,
   phrf,
   fgzw2h,
   fgzw2p,
   fgzw2hhrf,
   fgzw2phrf,
   fgzvor,
   fgzw1,
   fgzw2,
   fgzw3p
   )
